AN EXAMPLE: GDP AND LIFE EXPECTANCY

install.packages("gapminder", repos = "http://cran.us.r-project.org")
## Installing package into '/Users/Leona/Library/R/3.5/library'
## (as 'lib' is unspecified)
## 
## The downloaded binary packages are in
##  /var/folders/4h/vqb8hww915jc23y3mxgq2y8w0000gn/T//RtmpUIvVxF/downloaded_packages
library(ggplot2)
library(gapminder)

#Excerpt of the Gapminder data on life expectancy, GDP per capita, and population by country.
#Look at data

gapminder
## # A tibble: 1,704 x 6
##    country     continent  year lifeExp      pop gdpPercap
##    <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779.
##  2 Afghanistan Asia       1957    30.3  9240934      821.
##  3 Afghanistan Asia       1962    32.0 10267083      853.
##  4 Afghanistan Asia       1967    34.0 11537966      836.
##  5 Afghanistan Asia       1972    36.1 13079460      740.
##  6 Afghanistan Asia       1977    38.4 14880372      786.
##  7 Afghanistan Asia       1982    39.9 12881816      978.
##  8 Afghanistan Asia       1987    40.8 13867957      852.
##  9 Afghanistan Asia       1992    41.7 16317921      649.
## 10 Afghanistan Asia       1997    41.8 22227415      635.
## # ... with 1,694 more rows
str(gapminder)
## Classes 'tbl_df', 'tbl' and 'data.frame':    1704 obs. of  6 variables:
##  $ country  : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ year     : int  1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
##  $ lifeExp  : num  28.8 30.3 32 34 36.1 ...
##  $ pop      : int  8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
##  $ gdpPercap: num  779 821 853 836 740 ...
summary(gapminder)
##         country        continent        year         lifeExp     
##  Afghanistan:  12   Africa  :624   Min.   :1952   Min.   :23.60  
##  Albania    :  12   Americas:300   1st Qu.:1966   1st Qu.:48.20  
##  Algeria    :  12   Asia    :396   Median :1980   Median :60.71  
##  Angola     :  12   Europe  :360   Mean   :1980   Mean   :59.47  
##  Argentina  :  12   Oceania : 24   3rd Qu.:1993   3rd Qu.:70.85  
##  Australia  :  12                  Max.   :2007   Max.   :82.60  
##  (Other)    :1632                                                
##       pop              gdpPercap       
##  Min.   :6.001e+04   Min.   :   241.2  
##  1st Qu.:2.794e+06   1st Qu.:  1202.1  
##  Median :7.024e+06   Median :  3531.8  
##  Mean   :2.960e+07   Mean   :  7215.3  
##  3rd Qu.:1.959e+07   3rd Qu.:  9325.5  
##  Max.   :1.319e+09   Max.   :113523.1  
## 

GGPlot

1st layer: Data

p <- ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp)) # make the graph to an object 
p

p + geom_point() # and then tell ggplot we want a scater plot

ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp)) + geom_point() # same as above

Log Transformation

# what is the difference?
ggplot( data = gapminder, aes(x = log(gdpPercap), y = lifeExp)) + geom_point() #ln = log10

ggplot( data = gapminder, aes(x = log10(gdpPercap), y = lifeExp)) + geom_point() 

p <- ggplot( data = gapminder, aes(x = log(gdpPercap), y = lifeExp))
p1 <- ggplot( data = gapminder, aes(x = log10(gdpPercap), y = lifeExp)) + geom_point() + scale_x_log10()

Color

p + geom_point( aes(color = continent) )

p2 <- ggplot( data = gapminder, aes(x= log10(gdpPercap), y = lifeExp)) + scale_x_log10() + geom_point(aes(color = continent))
p2

Reduce overplotting

?geom_point

ggplot( data = gapminder, aes(x= log10(gdpPercap), y = lifeExp)) + scale_x_log10() + geom_point(aes(color = continent, alpha = 0.3, size = 3))

ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp)) + scale_x_log10() + geom_point(aes(color = continent), alpha = 0.3, size=3)  

ggplot(data = gapminder, aes(x= log10(gdpPercap), y = lifeExp)) + scale_x_log10()

ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp)) + scale_x_log10()